"""
共享单车投放量预测
"""
import pandas as pd
import sklearn.model_selection as ms
import sklearn.ensemble as se
import sklearn.metrics as sm
# 1. 加载数据
data = pd.read_csv('bike_day.csv')
# 2. 删除没必要的列
data.drop(['instant','dteday','casual','registered'],axis=1,inplace=True)
# 3. 处理输入和输出
x = data.iloc[:,:-1]
y = data.iloc[:,-1]
# 4. 划分训练集和测试集
train_x,test_x,train_y,test_y = ms.train_test_split(x,y,test_size=0.2,random_state=7)
# 5. 构建模型
model = se.RandomForestRegressor(max_depth=10,n_estimators=1000,min_samples_split=9)
# 训练模型
model.fit(train_x,train_y)
# 预测模型
pred_test_y = model.predict(test_x)

print(sm.r2_score(test_y,pred_test_y))
print(sm.mean_absolute_error(test_y,pred_test_y))

print(data['cnt'].mean())
